library(tidyverse)
library(parallel)
library(CoRC)
We will define a function to help us run tasks in parallel on all cores:
mapInParallel <- function(data, fun, ..., .export = character(), .prep = {}) {
cl <- makeCluster(detectCores())
clusterExport(cl = cl, .export)
clusterCall(cl = cl, fun = eval, .prep, env = .GlobalEnv)
result <- parLapplyLB(cl = cl, X = data, fun = as_mapper(fun), ..., chunk.size = 50)
stopCluster(cl)
result
}
This is an example from the Condor-Copasi paper 1.
First, we always have to load our model:
loadModel("https://raw.githubusercontent.com/copasi/condor-copasi/master/examples/stochastic_test.cps")
## # A COPASI model reference:
## Model name: "Kummer calcium model"
## Number of compartments: 1
## Number of species: 3
## Number of reactions: 8
Now, we will set our timecourse settings and save our model to a string so we can load it again later for our parallel processing.
setTimeCourseSettings(method = list(method = "directMethod",
use_random_seed = TRUE))
model_string <- saveModelToString()
Next, we will set everything up to run 1000 Timecouses in parallel. We need to export our model to the workers, as well as load CoRC and our model for each worker. Afterwards, we seet 1000 random seeds so each time course will get a different random seed. Lastly, we call the timecourse function.
timeseries <-
#Defines parallel evaluation
mapInParallel(
# export model to workers
.export = "model_string",
# prepare worker for the task
.prep = quote({
library(CoRC)
loadModelFromString(model_string)
}),
#iteration data (1000 random seeds)
1:1000,
# iteration function using the seed values
function(seed) runTimeCourse(method = list(random_seed = seed ))$result
)
Now, we will reshape our data to plot it later on: timeseries is a list of 1000 tibbles with each tibble a time course.
plotdata <-
timeseries %>%
bind_rows() %>%
group_by(Time) %>%
# calculate mean and sd for all time points
summarise(across(everything(), list(mean = mean, sd = sd)), .groups = "drop") %>%
# gather all values so the column `name` identifies "a_mean", "b_sd" etc.
pivot_longer(-Time) %>%
# split up information on species (a,b,c) and type of value (mean, sd)
separate(name, c("species", "type"), "_") %>%
pivot_wider(names_from = type, values_from = value)
head(plotdata)
## # A tibble: 6 x 4
## Time species mean sd
## <dbl> <chr> <dbl> <dbl>
## 1 0 a 8.00 0
## 2 0 b 8.00 0
## 3 0 c 8.00 0
## 4 0.05 a 7.06 0.249
## 5 0.05 b 8.12 0.117
## 6 0.05 c 5.60 0.442
Now, we will plot the data:
plot <-
ggplot(data = plotdata, aes(x = Time, y = mean, group = species, tt_sd = sd)) +
geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = species), alpha = 1 / 4) +
geom_line(aes(color = species)) +
guides(fill = "none") +
labs(
x = paste0("Time (", getTimeUnit(), ")"),
y = paste0("Concentration (", getQuantityUnit(), ")"),
color = "Species"
)
plotly::ggplotly(plot, tooltip = c("group", "x", "y", "tt_sd"))
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
To free some space, we can unload the model:
unloadModel()
Very similarly we will now scan over random concentrations of two species. This implements an example from the Mendes 2009 paper 2 on COPASI use cases.
We will load the model, set the steady state settings, and save our model as a string to load it in parallel later.
loadSBML("https://www.ebi.ac.uk/biomodels/model/download/BIOMD0000000068.2?filename=BIOMD0000000068_url.xml")
## # A COPASI model reference:
## Model name: "Curien2003_MetThr_synthesis"
## Number of compartments: 1
## Number of species: 9
## Number of reactions: 3
setSteadyStateSettings(calculate_jacobian = FALSE, perform_stability_analysis = FALSE)
model_string <- saveModelToString()
This time, we will run 10.000 stady-state calculations with random concentrations of two species (cysteine and adomed).
We will export the model to the workers, load CoRC and the model. Then we will assign random concentrations and run a steady state calculation:
scan <-
# Defines parallel evaluation:
mapInParallel(
# export the model to the workers
.export = "model_string",
# prepare worker for the task
.prep = quote({
library(CoRC)
loadModelFromString(model_string)
}),
# iteration data (10000 random seeds)
1:10000,
# iteration function using the seed values
function(seed) {
set.seed(seed)
cysteine <- 0.3 * 10 ^ runif(1L, min = 0, max = 3)
adomed <- runif(1L, min = 0, max = 100)
setSpecies(
key = c("Cysteine", "adenosyl"),
initial_concentration = c(cysteine, adomed)
)
ss <- runSteadyState()
stopifnot(ss$result == "found")
list(
cysteine = cysteine,
adomed = adomed,
CGS = ss$reactions$flux[2],
TS = ss$reactions$flux[3]
)
}
)
Now we can reshape and plot the data:
plotdata <-
scan %>%
bind_rows() %>%
pivot_longer(c(CGS, TS), names_to = "reaction", values_to = "flux")
plot <-
ggplot(data = plotdata, aes(x = adomed, y = flux, group = reaction, tt_cys = cysteine)) +
geom_point(aes(color = reaction), alpha = 1 / 10, size = 3 / 4) +
labs(
x = paste0("Adomed (", getQuantityUnit(), ")"),
y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
color = "Species"
)
plotly::ggplotly(plot, tooltip = c("tt_cys", "x", "y"))
At the end we can again unload the model to free up memory.
unloadModel()
Kent, Edward, Stefan Hoops, and Pedro Mendes. “Condor-COPASI: high-throughput computing for biochemical networks.” BMC systems biology 6.1 (2012): 91.↩︎
Mendes P., Hoops S., Sahle S., Gauges R., Dada J., Kummer U. (2009) Computational Modeling of Biochemical Networks Using COPASI. In: Maly I. (eds) Systems Biology. Methods in Molecular Biology (Methods and Protocols), vol 500. Humana Press. https://doi.org/10.1007/978-1-59745-525-1_2↩︎